Skip to content

Commit

Permalink
Make dbinit.sql slightly less order-dependent (#4288)
Browse files Browse the repository at this point in the history
This PR does two things:

1. It avoids checking for the order-dependent NOT NULL constraint in
information schema, opting to parse the output of `SHOW CONSTRAINTS` and
[information_schema.columna.is_nullable](https://www.cockroachlabs.com/docs/v23.1/information-schema#columns)
instead.
2. It re-orders the db_metadata creation to the end of dbinit.sql
(mostly for clarity)

Fixes #4286
  • Loading branch information
smklein authored Oct 18, 2023
1 parent 5346472 commit 463cc1a
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 123 deletions.
175 changes: 115 additions & 60 deletions nexus/tests/integration_tests/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,35 @@ impl<'a> From<&'a [&'static str]> for ColumnSelector<'a> {
}
}

async fn query_crdb_for_rows_of_strings(
async fn crdb_show_constraints(
crdb: &CockroachInstance,
table: &str,
) -> Vec<Row> {
let client = crdb.connect().await.expect("failed to connect");

let sql = format!("SHOW CONSTRAINTS FROM {table}");
let rows = client
.query(&sql, &[])
.await
.unwrap_or_else(|_| panic!("failed to query {table}"));
client.cleanup().await.expect("cleaning up after wipe");

let mut result = vec![];
for row in rows {
let mut row_result = Row::new();
for i in 0..row.len() {
let column_name = row.columns()[i].name();
row_result.values.push(NamedSqlValue {
column: column_name.to_string(),
value: row.get(i),
});
}
result.push(row_result);
}
result
}

async fn crdb_select(
crdb: &CockroachInstance,
columns: ColumnSelector<'_>,
table: &str,
Expand Down Expand Up @@ -453,22 +481,16 @@ async fn versions_have_idempotent_up() {
logctx.cleanup_successful();
}

const COLUMNS: [&'static str; 6] = [
const COLUMNS: [&'static str; 7] = [
"table_catalog",
"table_schema",
"table_name",
"column_name",
"column_default",
"is_nullable",
"data_type",
];

const CHECK_CONSTRAINTS: [&'static str; 4] = [
"constraint_catalog",
"constraint_schema",
"constraint_name",
"check_clause",
];

const CONSTRAINT_COLUMN_USAGE: [&'static str; 7] = [
"table_catalog",
"table_schema",
Expand Down Expand Up @@ -538,22 +560,9 @@ const PG_INDEXES: [&'static str; 5] =
const TABLES: [&'static str; 4] =
["table_catalog", "table_schema", "table_name", "table_type"];

const TABLE_CONSTRAINTS: [&'static str; 9] = [
"constraint_catalog",
"constraint_schema",
"constraint_name",
"table_catalog",
"table_schema",
"table_name",
"constraint_type",
"is_deferrable",
"initially_deferred",
];

#[derive(Eq, PartialEq, Debug)]
struct InformationSchema {
columns: Vec<Row>,
check_constraints: Vec<Row>,
constraint_column_usage: Vec<Row>,
key_column_usage: Vec<Row>,
referential_constraints: Vec<Row>,
Expand All @@ -562,7 +571,7 @@ struct InformationSchema {
sequences: Vec<Row>,
pg_indexes: Vec<Row>,
tables: Vec<Row>,
table_constraints: Vec<Row>,
table_constraints: BTreeMap<String, Vec<Row>>,
}

impl InformationSchema {
Expand All @@ -576,10 +585,6 @@ impl InformationSchema {
self.table_constraints,
other.table_constraints
);
similar_asserts::assert_eq!(
self.check_constraints,
other.check_constraints
);
similar_asserts::assert_eq!(
self.constraint_column_usage,
other.constraint_column_usage
Expand All @@ -602,97 +607,83 @@ impl InformationSchema {
// https://www.cockroachlabs.com/docs/v23.1/information-schema
//
// For details on each of these tables.
let columns = query_crdb_for_rows_of_strings(
let columns = crdb_select(
crdb,
COLUMNS.as_slice().into(),
"information_schema.columns",
Some("table_schema = 'public'"),
)
.await;

let check_constraints = query_crdb_for_rows_of_strings(
crdb,
CHECK_CONSTRAINTS.as_slice().into(),
"information_schema.check_constraints",
None,
)
.await;

let constraint_column_usage = query_crdb_for_rows_of_strings(
let constraint_column_usage = crdb_select(
crdb,
CONSTRAINT_COLUMN_USAGE.as_slice().into(),
"information_schema.constraint_column_usage",
None,
)
.await;

let key_column_usage = query_crdb_for_rows_of_strings(
let key_column_usage = crdb_select(
crdb,
KEY_COLUMN_USAGE.as_slice().into(),
"information_schema.key_column_usage",
None,
)
.await;

let referential_constraints = query_crdb_for_rows_of_strings(
let referential_constraints = crdb_select(
crdb,
REFERENTIAL_CONSTRAINTS.as_slice().into(),
"information_schema.referential_constraints",
None,
)
.await;

let views = query_crdb_for_rows_of_strings(
let views = crdb_select(
crdb,
VIEWS.as_slice().into(),
"information_schema.views",
None,
)
.await;

let statistics = query_crdb_for_rows_of_strings(
let statistics = crdb_select(
crdb,
STATISTICS.as_slice().into(),
"information_schema.statistics",
None,
)
.await;

let sequences = query_crdb_for_rows_of_strings(
let sequences = crdb_select(
crdb,
SEQUENCES.as_slice().into(),
"information_schema.sequences",
None,
)
.await;

let pg_indexes = query_crdb_for_rows_of_strings(
let pg_indexes = crdb_select(
crdb,
PG_INDEXES.as_slice().into(),
"pg_indexes",
Some("schemaname = 'public'"),
)
.await;

let tables = query_crdb_for_rows_of_strings(
let tables = crdb_select(
crdb,
TABLES.as_slice().into(),
"information_schema.tables",
Some("table_schema = 'public'"),
)
.await;

let table_constraints = query_crdb_for_rows_of_strings(
crdb,
TABLE_CONSTRAINTS.as_slice().into(),
"information_schema.table_constraints",
Some("table_schema = 'public'"),
)
.await;
let table_constraints =
Self::show_constraints_all_tables(&tables, crdb).await;

Self {
columns,
check_constraints,
constraint_column_usage,
key_column_usage,
referential_constraints,
Expand All @@ -705,6 +696,33 @@ impl InformationSchema {
}
}

async fn show_constraints_all_tables(
tables: &Vec<Row>,
crdb: &CockroachInstance,
) -> BTreeMap<String, Vec<Row>> {
let mut map = BTreeMap::new();

for table in tables {
let table = &table.values;
let table_catalog =
table[0].expect("table_catalog").unwrap().as_str();
let table_schema =
table[1].expect("table_schema").unwrap().as_str();
let table_name = table[2].expect("table_name").unwrap().as_str();
let table_type = table[3].expect("table_type").unwrap().as_str();

if table_type != "BASE TABLE" {
continue;
}

let table_name =
format!("{}.{}.{}", table_catalog, table_schema, table_name);
let rows = crdb_show_constraints(crdb, &table_name).await;
map.insert(table_name, rows);
}
map
}

// This would normally be quite an expensive operation, but we expect it'll
// at least be slightly cheaper for the freshly populated DB, which
// shouldn't have that many records yet.
Expand All @@ -731,13 +749,9 @@ impl InformationSchema {
let table_name =
format!("{}.{}.{}", table_catalog, table_schema, table_name);
info!(log, "Querying table: {table_name}");
let rows = query_crdb_for_rows_of_strings(
crdb,
ColumnSelector::Star,
&table_name,
None,
)
.await;
let rows =
crdb_select(crdb, ColumnSelector::Star, &table_name, None)
.await;
info!(log, "Saw data: {rows:?}");
map.insert(table_name, rows);
}
Expand Down Expand Up @@ -1012,6 +1026,47 @@ async fn compare_table_differing_constraint() {
)
.await;

assert_ne!(schema1.check_constraints, schema2.check_constraints);
assert_ne!(schema1.table_constraints, schema2.table_constraints);
logctx.cleanup_successful();
}

#[tokio::test]
async fn compare_table_differing_not_null_order() {
let config = load_test_config();
let logctx = LogContext::new(
"compare_table_differing_not_null_order",
&config.pkg.log,
);
let log = &logctx.log;

let schema1 = get_information_schema(
log,
"
CREATE DATABASE omicron;
CREATE TABLE omicron.public.pet ( id UUID PRIMARY KEY );
CREATE TABLE omicron.public.employee (
id UUID PRIMARY KEY,
name TEXT NOT NULL,
hobbies TEXT
);
",
)
.await;

let schema2 = get_information_schema(
log,
"
CREATE DATABASE omicron;
CREATE TABLE omicron.public.employee (
id UUID PRIMARY KEY,
name TEXT NOT NULL,
hobbies TEXT
);
CREATE TABLE omicron.public.pet ( id UUID PRIMARY KEY );
",
)
.await;

schema1.pretty_assert_eq(&schema2);
logctx.cleanup_successful();
}
41 changes: 9 additions & 32 deletions schema/crdb/README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -73,38 +73,15 @@ SQL Validation, via Automated Tests:

==== Handling common schema changes

CockroachDB's schema includes a description of all of the database's CHECK
constraints. If a CHECK constraint is anonymous (i.e. it is written simply as
`CHECK <expression>` and not `CONSTRAINT <name> CHECK expression`), CRDB
assigns it a name based on the table and column to which the constraint applies.
The challenge is that CRDB identifies tables and columns using opaque
identifiers whose values depend on the order in which tables and views were
defined in the current database. This means that adding, removing, or renaming
objects needs to be done carefully to preserve the relative ordering of objects
in new databases created by `dbinit.sql` and upgraded databases created by
applying `up.sql` transformations.

===== Adding new columns with constraints

Strongly consider naming new constraints (`CONSTRAINT <name> <expression>`) to
avoid the problems with anonymous constraints described above.

===== Adding tables and views

New tables and views must be added to the end of `dbinit.sql` so that the order
of preceding `CREATE` statements is left unchanged. If your changes fail the
`CHECK` constraints test and you get a constraint name diff like this...

```
NamedSqlValue {
column: "constraint_name",
value: Some(
String(
< "4101115737_149_10_not_null",
> "4101115737_148_10_not_null",
```

...then you've probably inadvertently added a table or view in the wrong place.
Although CockroachDB's schema includes some opaque internally-generated fields
that are order dependent - such as the names of anonymous CHECK constraints -
our schema comparison tools intentionally ignore these values. As a result,
when performing schema changes, the order of new tables and constraints should
generally not be important.

As convention, however, we recommend keeping the `db_metadata` file at the end of
`dbinit.sql`, so that the database does not contain a version until it is fully
populated.

==== Adding new source tables to an existing view

Expand Down
Loading

0 comments on commit 463cc1a

Please sign in to comment.