From d305fb4a37f8ec414a19c5f1c0645ad762648136 Mon Sep 17 00:00:00 2001 From: Deepthi Srinivasan Date: Mon, 20 Dec 2021 22:20:30 +0000 Subject: [PATCH] [#6149] [#10587] YSQL: Partitioned table primary key is not correctly inherited by partitions Summary: When a partition table is created, it should inherit the primary key from the parent partitioned table. However, inheriting the primary key is done after creating the table on the DocDB side. Thus, the table gets created on DocDB without an explicit primary key. This can be observed by creating a child partition and checking DocDB schema, which will show that the partition key is the auto-generated ybrowid. This has other side effects like the following: ``` CREATE TABLE test(h1 int NOT NULL, h2 int NOT NULL, h3 int NOT NULL, v1 int, primary key(h1, h2, h3)) PARTITION BY HASH(h1); CREATE TABLE test_1 PARTITION of test FOR VALUES WITH (modulus 2, remainder 0); yugabyte=# EXPLAIN SELECT * FROM test WHERE h1 = 1 AND h2 = 4 AND h3 = 3; QUERY PLAN ------------------------------------------------------------------ Append (cost=0.00..112.50 rows=1000 width=16) -> Seq Scan on test_1 (cost=0.00..107.50 rows=1000 width=16). // This should be an IndexScan as the query populates all the columns in the PK Filter: ((h1 = 1) AND (h2 = 4) AND (h3 = 3)) (3 rows) ``` So long, this has been worked around by specifying a primary key explicitly while creating the child partitions. In this patch, before sending the RPC to DocDB, find whether this is is a partition table. If so, inherit the primary key from the parent table. Additionally another minor bug has been fixed in generateClonedIndexStatement. It assigns the ordering of the index to SORTBY_DEFAULT if it is not a hash index. This usually works fine as SORTBY_DEFAULT usually correctly defaults to ASC. However for the first column of a primary key, SORTBY_DEFAULT defaults to hash for YB. Therefore if the parent partitioned table had been created with a primary key where the first column was ASC, without the fix, the child partition's first column of pk would default to SORTBY_HASH. Test Plan: ybd --scb --sj --java-test org.yb.pgsql.TestPgRegressPartitions Reviewers: alex Reviewed By: alex Subscribers: yql Differential Revision: https://phabricator.dev.yugabyte.com/D14069 --- .../org/yb/pgsql/TestPgRegressPartitions.java | 2 +- src/postgres/src/backend/commands/ybccmds.c | 68 +++++- .../src/backend/parser/parse_utilcmd.c | 10 + .../test/regress/expected/yb_partition_pk.out | 212 ++++++++++++++++++ .../expected/yb_pg_partition_update.out | 2 + .../src/test/regress/sql/yb_partition_pk.sql | 107 +++++++++ .../regress/sql/yb_pg_partition_update.sql | 1 + .../test/regress/yb_pg_partitions_schedule | 1 + 8 files changed, 401 insertions(+), 2 deletions(-) create mode 100644 src/postgres/src/test/regress/expected/yb_partition_pk.out create mode 100644 src/postgres/src/test/regress/sql/yb_partition_pk.sql diff --git a/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgRegressPartitions.java b/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgRegressPartitions.java index 5a59386ab123..91fd391bec9b 100644 --- a/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgRegressPartitions.java +++ b/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgRegressPartitions.java @@ -23,7 +23,7 @@ public class TestPgRegressPartitions extends BasePgSQLTest { @Override public int getTestMethodTimeoutSec() { - return 1800; + return 3000; } @Test diff --git a/src/postgres/src/backend/commands/ybccmds.c b/src/postgres/src/backend/commands/ybccmds.c index 73cb43b2bf95..b9cad09e469c 100644 --- a/src/postgres/src/backend/commands/ybccmds.c +++ b/src/postgres/src/backend/commands/ybccmds.c @@ -28,6 +28,7 @@ #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/index.h" +#include "catalog/namespace.h" #include "catalog/pg_am.h" #include "catalog/pg_attribute.h" #include "catalog/pg_class.h" @@ -38,9 +39,9 @@ #include "catalog/pg_type_d.h" #include "catalog/yb_type.h" #include "commands/dbcommands.h" -#include "commands/ybccmds.h" #include "commands/tablegroup.h" #include "commands/tablecmds.h" +#include "commands/ybccmds.h" #include "access/htup_details.h" #include "utils/builtins.h" @@ -492,6 +493,71 @@ YBCCreateTable(CreateStmt *stmt, char relkind, TupleDesc desc, } } + /* + * If this is a partition table, check whether it needs to inherit the same + * primary key as the parent partitioned table. + */ + if (primary_key == NULL && stmt->partbound) + { + /* + * This relation is not created yet and not visible to other + * backends. It doesn't really matter what lock we take here. + */ + Relation rel = relation_open(relationId, AccessShareLock); + + /* Find the parent partitioned table */ + RangeVar *rv = (RangeVar *) lfirst(list_head(stmt->inhRelations)); + Oid parentOid = RangeVarGetRelid(rv, NoLock, false); + + Relation parentRel = heap_open(parentOid, NoLock); + List *idxlist = RelationGetIndexList(parentRel); + ListCell *cell; + foreach(cell, idxlist) + { + Relation idxRel = index_open(lfirst_oid(cell), AccessShareLock); + /* Fetch pg_index tuple for source index from relcache entry */ + if (!((Form_pg_index) GETSTRUCT(idxRel->rd_indextuple))->indisprimary) + { + /* + * This is not a primary key index so this doesn't matter. + */ + relation_close(idxRel, AccessShareLock); + continue; + } + AttrNumber *attmap; + IndexStmt *idxstmt; + Oid constraintOid; + + attmap = convert_tuples_by_name_map(RelationGetDescr(rel), + RelationGetDescr(parentRel), + gettext_noop("could not convert row type")); + idxstmt = + generateClonedIndexStmt(NULL, RelationGetRelid(rel), idxRel, + attmap, RelationGetDescr(rel)->natts, + &constraintOid); + + primary_key = makeNode(Constraint); + primary_key->contype = CONSTR_PRIMARY; + primary_key->conname = idxstmt->idxname; + primary_key->options = idxstmt->options; + primary_key->indexspace = idxstmt->tableSpace; + + ListCell *idxcell; + foreach(idxcell, idxstmt->indexParams) + { + IndexElem* ielem = lfirst(idxcell); + primary_key->keys = + lappend(primary_key->keys, makeString(ielem->name)); + primary_key->yb_index_params = + lappend(primary_key->yb_index_params, ielem); + } + + relation_close(idxRel, AccessShareLock); + } + heap_close(parentRel, NoLock); + heap_close(rel, AccessShareLock); + } + /* By default, inherit the colocated option from the database */ bool colocated = MyDatabaseColocated; diff --git a/src/postgres/src/backend/parser/parse_utilcmd.c b/src/postgres/src/backend/parser/parse_utilcmd.c index 46a49677f1db..9f193e936357 100644 --- a/src/postgres/src/backend/parser/parse_utilcmd.c +++ b/src/postgres/src/backend/parser/parse_utilcmd.c @@ -1768,6 +1768,16 @@ generateClonedIndexStmt(RangeVar *heapRel, Oid heapRelid, Relation source_idx, { if (opt & INDOPTION_NULLS_FIRST) iparam->nulls_ordering = SORTBY_NULLS_FIRST; + + /* + * If the index supports ordering and it is neither + * SORTBY_HASH nor SORTBY_DESC, then the source index must have + * been SORTBY_ASC. If we do not set it here, during index + * creation, the ordering for the first attribute will wrongly + * default to SORTBY_HASH. + */ + if (iparam->ordering == SORTBY_DEFAULT) + iparam->ordering = SORTBY_ASC; } } diff --git a/src/postgres/src/test/regress/expected/yb_partition_pk.out b/src/postgres/src/test/regress/expected/yb_partition_pk.out new file mode 100644 index 000000000000..fd91617ee764 --- /dev/null +++ b/src/postgres/src/test/regress/expected/yb_partition_pk.out @@ -0,0 +1,212 @@ +-- Verify that for all 3 types of partitioning, the primary key +-- is correctly inherited by the partitions. +-- Hash partitioning where primary key contains both hash and range components. +CREATE TABLE test(h1 int NOT NULL, + h2 int NOT NULL, + h3 int NOT NULL, + v1 int, + primary key(h1, h2, h3)) +PARTITION BY HASH(h1); +-- Create a partition table. +CREATE TABLE test_1 PARTITION of test FOR VALUES WITH (modulus 2, remainder 0); +-- Create a table without a primary key and attach it as a partition. +CREATE TABLE test_2(h1 int NOT NULL, + h2 int NOT NULL, + h3 int NOT NULL, + v1 int); +ALTER TABLE test ATTACH PARTITION test_2 FOR VALUES WITH (modulus 2, remainder 1); +\d test_1; + Table "public.test_1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + h1 | integer | | not null | + h2 | integer | | not null | + h3 | integer | | not null | + v1 | integer | | | +Partition of: test FOR VALUES WITH (modulus 2, remainder 0) +Indexes: + "test_1_pkey" PRIMARY KEY, lsm (h1 HASH, h2 ASC, h3 ASC) + +\d test_2; + Table "public.test_2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + h1 | integer | | not null | + h2 | integer | | not null | + h3 | integer | | not null | + v1 | integer | | | +Partition of: test FOR VALUES WITH (modulus 2, remainder 1) +Indexes: + "test_2_pkey" PRIMARY KEY, lsm (h1 HASH, h2 ASC, h3 ASC) + +INSERT INTO test VALUES (1, 4, 3); +-- Fail because primary key constraint is violated. +INSERT INTO test VALUES (1, 4, 3); +ERROR: duplicate key value violates unique constraint "test_1_pkey" +EXPLAIN (COSTS OFF) SELECT * FROM test WHERE h1 = 1 AND h2 = 4 AND h3 = 3; + QUERY PLAN +---------------------------------------------------------- + Append + -> Index Scan using test_1_pkey on test_1 + Index Cond: ((h1 = 1) AND (h2 = 4) AND (h3 = 3)) +(3 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test WHERE h1 = 1 ORDER BY h2; + QUERY PLAN +---------------------------------------------- + Merge Append + Sort Key: test_1.h2 + -> Index Scan using test_1_pkey on test_1 + Index Cond: (h1 = 1) +(4 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test WHERE h1 = 1 AND h2 = 4 ORDER BY h3; + QUERY PLAN +---------------------------------------------- + Merge Append + Sort Key: test_1.h3 + -> Index Scan using test_1_pkey on test_1 + Index Cond: ((h1 = 1) AND (h2 = 4)) +(4 rows) + +-- LIST partitioning where primary key contain only hash components. +CREATE TABLE person ( + person_id int not null, + country text, + name text, + age int, + PRIMARY KEY((person_id, country) HASH)) +PARTITION BY LIST (country); +CREATE TABLE person_americas + PARTITION OF person + FOR VALUES IN ('United States', 'Brazil', 'Mexico', 'Columbia'); +CREATE TABLE person_apac ( + person_id int not null, + country text not null, + name text, + age int); +ALTER TABLE person ATTACH PARTITION person_apac FOR VALUES IN ('India', 'Singapore'); +\d person_americas; + Table "public.person_americas" + Column | Type | Collation | Nullable | Default +-----------+---------+-----------+----------+--------- + person_id | integer | | not null | + country | text | | not null | + name | text | | | + age | integer | | | +Partition of: person FOR VALUES IN ('United States', 'Brazil', 'Mexico', 'Columbia') +Indexes: + "person_americas_pkey" PRIMARY KEY, lsm ((person_id, country) HASH) + +\d person_apac; + Table "public.person_apac" + Column | Type | Collation | Nullable | Default +-----------+---------+-----------+----------+--------- + person_id | integer | | not null | + country | text | | not null | + name | text | | | + age | integer | | | +Partition of: person FOR VALUES IN ('India', 'Singapore') +Indexes: + "person_apac_pkey" PRIMARY KEY, lsm ((person_id, country) HASH) + +INSERT INTO person_americas VALUES (1, 'United States', 'Jane Doe', 23); +-- Fail due to primary key constraint failure. +INSERT INTO person_americas VALUES (1, 'United States', 'Jane Doe', 23); +ERROR: duplicate key value violates unique constraint "person_americas_pkey" +EXPLAIN (COSTS OFF) SELECT * FROM person WHERE person_id=1 AND country='United States'; + QUERY PLAN +----------------------------------------------------------------------------- + Append + -> Index Scan using person_americas_pkey on person_americas + Index Cond: ((person_id = 1) AND (country = 'United States'::text)) +(3 rows) + +-- Range partitioning where primary key contains only range components. +CREATE TABLE parted (a int, b text, PRIMARY KEY (a ASC, b DESC)) PARTITION BY RANGE(a); +CREATE TABLE part_a_1_5 PARTITION OF parted (a, b) FOR VALUES FROM (1) TO (5); +CREATE TABLE part_a_5_10 PARTITION OF parted (a, b) FOR VALUES FROM (5) TO (10); +\d part_a_1_5; + Table "public.part_a_1_5" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | not null | + b | text | | not null | +Partition of: parted FOR VALUES FROM (1) TO (5) +Indexes: + "part_a_1_5_pkey" PRIMARY KEY, lsm (a ASC, b DESC) + +\d part_a_5_10; + Table "public.part_a_5_10" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | not null | + b | text | | not null | +Partition of: parted FOR VALUES FROM (5) TO (10) +Indexes: + "part_a_5_10_pkey" PRIMARY KEY, lsm (a ASC, b DESC) + +INSERT INTO parted VALUES (1, '1'); +-- Fail +INSERT INTO parted VALUES (1, '1'); +ERROR: duplicate key value violates unique constraint "part_a_1_5_pkey" +EXPLAIN (COSTS OFF) SELECT * FROM parted WHERE a = 1 ORDER BY b DESC; + QUERY PLAN +------------------------------------------------------ + Merge Append + Sort Key: part_a_1_5.b DESC + -> Index Scan using part_a_1_5_pkey on part_a_1_5 + Index Cond: (a = 1) +(4 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM parted ORDER BY a; + QUERY PLAN +-------------------------------------------------------- + Merge Append + Sort Key: part_a_1_5.a + -> Index Scan using part_a_1_5_pkey on part_a_1_5 + -> Index Scan using part_a_5_10_pkey on part_a_5_10 +(4 rows) + +-- Test creating a partition with a different primary key. +CREATE TABLE part_a_15_20 PARTITION OF parted (a, b, PRIMARY KEY(a HASH)) FOR VALUES FROM (15) TO (20); +ERROR: multiple primary keys for table "part_a_15_20" are not allowed +-- Test attaching a partition with a different primary key. +CREATE TABLE part_a_20_25 (a int, b text NOT NULL, PRIMARY KEY (a ASC)); +ALTER TABLE parted ATTACH PARTITION part_a_20_25 FOR VALUES FROM (20) TO (25); +ERROR: multiple primary keys for table "part_a_20_25" are not allowed +-- Test a complicated situation where the attribute numbers of partition tables +-- and partitioned tables are different. +CREATE TABLE col_order_change ( + a text, + b bigint, + c numeric, + d int, + e varchar, + PRIMARY KEY ((a,b) HASH, c ASC, d DESC) +) PARTITION BY RANGE (a, b); +CREATE TABLE col_order_change_part (e varchar, c numeric, a text, b bigint, d int NOT NULL); +ALTER TABLE col_order_change_part DROP COLUMN e, DROP COLUMN c, DROP COLUMN a; +ALTER TABLE col_order_change_part ADD COLUMN c numeric NOT NULL, ADD COLUMN e varchar NOT NULL, ADD COLUMN a text NOT NULL; +ALTER TABLE col_order_change_part DROP COLUMN b; +ALTER TABLE col_order_change_part ADD COLUMN b bigint NOT NULL; +ALTER TABLE col_order_change ATTACH PARTITION col_order_change_part FOR VALUES FROM ('a', 10) TO ('a', 20); +\d col_order_change_part; + Table "public.col_order_change_part" + Column | Type | Collation | Nullable | Default +--------+-------------------+-----------+----------+--------- + d | integer | | not null | + c | numeric | | not null | + e | character varying | | not null | + a | text | | not null | + b | bigint | | not null | +Partition of: col_order_change FOR VALUES FROM ('a', '10') TO ('a', '20') +Indexes: + "col_order_change_part_pkey" PRIMARY KEY, lsm ((a, b) HASH, c ASC, d DESC) + +-- Cleanup +DROP TABLE col_order_change; +DROP TABLE parted; +DROP TABLE person; +DROP TABLE test; +DROP TABLE part_a_20_25; diff --git a/src/postgres/src/test/regress/expected/yb_pg_partition_update.out b/src/postgres/src/test/regress/expected/yb_pg_partition_update.out index 578b3a8b570f..8b3911a8561d 100644 --- a/src/postgres/src/test/regress/expected/yb_pg_partition_update.out +++ b/src/postgres/src/test/regress/expected/yb_pg_partition_update.out @@ -708,3 +708,5 @@ SELECT * FROM parted ORDER BY a; 6 | 6 8 | 6 (5 rows) + +DROP TABLE parted; diff --git a/src/postgres/src/test/regress/sql/yb_partition_pk.sql b/src/postgres/src/test/regress/sql/yb_partition_pk.sql new file mode 100644 index 000000000000..458433993b3f --- /dev/null +++ b/src/postgres/src/test/regress/sql/yb_partition_pk.sql @@ -0,0 +1,107 @@ +-- Verify that for all 3 types of partitioning, the primary key +-- is correctly inherited by the partitions. +-- Hash partitioning where primary key contains both hash and range components. +CREATE TABLE test(h1 int NOT NULL, + h2 int NOT NULL, + h3 int NOT NULL, + v1 int, + primary key(h1, h2, h3)) +PARTITION BY HASH(h1); + +-- Create a partition table. +CREATE TABLE test_1 PARTITION of test FOR VALUES WITH (modulus 2, remainder 0); + +-- Create a table without a primary key and attach it as a partition. +CREATE TABLE test_2(h1 int NOT NULL, + h2 int NOT NULL, + h3 int NOT NULL, + v1 int); +ALTER TABLE test ATTACH PARTITION test_2 FOR VALUES WITH (modulus 2, remainder 1); + +\d test_1; +\d test_2; + +INSERT INTO test VALUES (1, 4, 3); + +-- Fail because primary key constraint is violated. +INSERT INTO test VALUES (1, 4, 3); +EXPLAIN (COSTS OFF) SELECT * FROM test WHERE h1 = 1 AND h2 = 4 AND h3 = 3; +EXPLAIN (COSTS OFF) SELECT * FROM test WHERE h1 = 1 ORDER BY h2; +EXPLAIN (COSTS OFF) SELECT * FROM test WHERE h1 = 1 AND h2 = 4 ORDER BY h3; + +-- LIST partitioning where primary key contain only hash components. +CREATE TABLE person ( + person_id int not null, + country text, + name text, + age int, + PRIMARY KEY((person_id, country) HASH)) +PARTITION BY LIST (country); + +CREATE TABLE person_americas + PARTITION OF person + FOR VALUES IN ('United States', 'Brazil', 'Mexico', 'Columbia'); + +CREATE TABLE person_apac ( + person_id int not null, + country text not null, + name text, + age int); +ALTER TABLE person ATTACH PARTITION person_apac FOR VALUES IN ('India', 'Singapore'); + +\d person_americas; +\d person_apac; + +INSERT INTO person_americas VALUES (1, 'United States', 'Jane Doe', 23); +-- Fail due to primary key constraint failure. +INSERT INTO person_americas VALUES (1, 'United States', 'Jane Doe', 23); + +EXPLAIN (COSTS OFF) SELECT * FROM person WHERE person_id=1 AND country='United States'; + +-- Range partitioning where primary key contains only range components. +CREATE TABLE parted (a int, b text, PRIMARY KEY (a ASC, b DESC)) PARTITION BY RANGE(a); +CREATE TABLE part_a_1_5 PARTITION OF parted (a, b) FOR VALUES FROM (1) TO (5); +CREATE TABLE part_a_5_10 PARTITION OF parted (a, b) FOR VALUES FROM (5) TO (10); + +\d part_a_1_5; +\d part_a_5_10; + +INSERT INTO parted VALUES (1, '1'); + +-- Fail +INSERT INTO parted VALUES (1, '1'); +EXPLAIN (COSTS OFF) SELECT * FROM parted WHERE a = 1 ORDER BY b DESC; +EXPLAIN (COSTS OFF) SELECT * FROM parted ORDER BY a; + +-- Test creating a partition with a different primary key. +CREATE TABLE part_a_15_20 PARTITION OF parted (a, b, PRIMARY KEY(a HASH)) FOR VALUES FROM (15) TO (20); + +-- Test attaching a partition with a different primary key. +CREATE TABLE part_a_20_25 (a int, b text NOT NULL, PRIMARY KEY (a ASC)); +ALTER TABLE parted ATTACH PARTITION part_a_20_25 FOR VALUES FROM (20) TO (25); + +-- Test a complicated situation where the attribute numbers of partition tables +-- and partitioned tables are different. +CREATE TABLE col_order_change ( + a text, + b bigint, + c numeric, + d int, + e varchar, + PRIMARY KEY ((a,b) HASH, c ASC, d DESC) +) PARTITION BY RANGE (a, b); + +CREATE TABLE col_order_change_part (e varchar, c numeric, a text, b bigint, d int NOT NULL); +ALTER TABLE col_order_change_part DROP COLUMN e, DROP COLUMN c, DROP COLUMN a; +ALTER TABLE col_order_change_part ADD COLUMN c numeric NOT NULL, ADD COLUMN e varchar NOT NULL, ADD COLUMN a text NOT NULL; +ALTER TABLE col_order_change_part DROP COLUMN b; +ALTER TABLE col_order_change_part ADD COLUMN b bigint NOT NULL; +ALTER TABLE col_order_change ATTACH PARTITION col_order_change_part FOR VALUES FROM ('a', 10) TO ('a', 20); +\d col_order_change_part; + +-- Cleanup +DROP TABLE col_order_change; +DROP TABLE parted; +DROP TABLE person; +DROP TABLE test; +DROP TABLE part_a_20_25; diff --git a/src/postgres/src/test/regress/sql/yb_pg_partition_update.sql b/src/postgres/src/test/regress/sql/yb_pg_partition_update.sql index 0fe7f2bab210..8419ac79c63b 100644 --- a/src/postgres/src/test/regress/sql/yb_pg_partition_update.sql +++ b/src/postgres/src/test/regress/sql/yb_pg_partition_update.sql @@ -519,3 +519,4 @@ EXPLAIN UPDATE parted SET b='6' WHERE a > 1; UPDATE parted SET b='6' WHERE a > 1; -- Verify that the updates happened successfully. SELECT * FROM parted ORDER BY a; +DROP TABLE parted; diff --git a/src/postgres/src/test/regress/yb_pg_partitions_schedule b/src/postgres/src/test/regress/yb_pg_partitions_schedule index 7aae3cc9d982..e623c404b898 100644 --- a/src/postgres/src/test/regress/yb_pg_partitions_schedule +++ b/src/postgres/src/test/regress/yb_pg_partitions_schedule @@ -10,3 +10,4 @@ test: yb_pg_partition_aggregate test: yb_pg_partition_update test: yb_partition_fk +test: yb_partition_pk